*===============================================================================
*
*					WORKER BELIEFS ABOUT OUTSIDE OPTIONS
*		(c)	Simon Jaeger, Christopher Roth, Nina Roussille, Benjamin Schoefer
*							  2023 December 5
*						   	   	 Local Data 
*
*===============================================================================


********************************************************************************
*								Occupation Crosswalk						   *
********************************************************************************

* creating qualtrics - IAB occupation crosswalk 

clear

import excel "$exp_data/3_digit_occupation.xlsx", firstrow clear

drop if KldB2010=="Erstellungsdatum: 26.09.2013, DKT Statistik"
*destring KldB2010, replace

rename KldB2010 occupation
drop if occupation=="" | occupation=="."

rename FilterzurAuswahlderEbenen occlevel
replace occlevel = substr(occlevel,10,10)
destring occlevel, replace

rename LangbezeichnungenderSystematik occname

* occnames for the occupation-search file
preserve

	keep if occlevel==5

	keep occupation occname
	
	destring occupation, replace
	
	save "${temp}/occupation_names.dta", replace
	

restore

* first column of the Excel file
preserve
	
	keep if occlevel==1
	
	destring occupation, replace
	
	keep occupation occname
	rename occupation occupation1
	rename occname occname1
	
	save "${temp}/occupation_column1.dta", replace
	
	
restore

* second column of the Excel file
preserve

	keep if occlevel==2
	
	gen occupation1 = substr(occupation,1,1)
	destring occupation1, replace
	
	destring occupation, replace
	
	keep occupation occname occupation1
	rename occupation occupation2
	rename occname occname2
	
	save "${temp}/occupation_column2.dta", replace


restore

* third column of the Excel file
preserve

	keep if occlevel==3
	
	gen occupation2 = substr(occupation,1,2)
	destring occupation2, replace
	
	destring occupation, replace
	
	keep occupation occname occupation2
	rename occupation occupation3
	rename occname occname3
	
	save "${temp}/occupation_column3.dta", replace


restore

* fourth column of the Excel file
preserve

	keep if occlevel==4
	
	gen occupation3 = substr(occupation,1,3)
	destring occupation3, replace
	
	destring occupation, replace
	
	keep occupation occname occupation3
	rename occupation occupation4
	rename occname occname4
	
	save "${temp}/occupation_column4.dta", replace

restore

* fifth column of the Excel file
preserve

	keep if occlevel==5
	
	gen occupation4 = substr(occupation,1,4)
	destring occupation4, replace
	
	destring occupation, replace
	
	keep occupation occname occupation4
	rename occupation occupation5
	rename occname occname5
	
	save "${temp}/occupation_column5.dta", replace

restore


* merging backwards
use "${temp}/occupation_column5.dta", clear

foreach n in 4 3 2 1 {
	merge m:1 occupation`n' using "${temp}/occupation_column`n'.dta", keep(master match) nogen
}

order occupation1 occname1 occupation2 occname2 occupation3 occname3 occupation4 occname4 occupation5 occname5

sort occupation1 occupation2 occupation3 occupation4 occupation5

export excel "${temp}/qualtrics_occupations_full.xlsx", replace

preserve

	keep occupation5 occname5
	rename occupation5 occupation
	
	save "${temp}/occnames_qualtrics.dta", replace

restore

preserve

	keep occname1 occupation1
	
	sort occname1
	quietly by occname1: gen dup = cond(_N==1,0,_n)
	drop if dup>1
	drop dup
	
	sort occupation1
	
	keep occname1
	
	export excel "${temp}/qualtrics_occupation1.xlsx", replace

restore

preserve

	keep occname2 occupation2
	
	sort occname2
	quietly by occname2: gen dup = cond(_N==1,0,_n)
	drop if dup>1
	drop dup
	
	sort occupation2
	
	keep occname2
	
	export excel "${temp}/qualtrics_occupation2.xlsx", replace

restore

keep occname1 occname2 occname3 occname4 occname5

export excel "${temp}/qualtrics_occupations.xlsx", replace


* you have to export Excel, then open the Excel file and convert to csv
* exporting directly to csv creates annoying line breaks that mess up the dropdown menus

